RNA-seq-report

Author

EI Olekhnovich

Analysis of RNA-seq data

1. Import libraries.

library(vegan)
library(fgsea)
library(limma)
library(BioNERO)
library(DESeq2)
library(lmerTest)
library(permutes)
library(tidyverse)
library(immunedeconv)
library(clusterProfiler)

library(ggpubr)
library(ggplot2)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(EnhancedVolcano)

library(msigdbr)
library(biomaRt)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(Orthology.eg.db)

library(DT)
  1. Import data and create DESeq2 object.
# Import data
meta_data <- read.csv("../data/metadata.tsv", sep = "\t")
meta_data$group <- as.factor(meta_data$group)
meta_data$batch <- as.factor(meta_data$batch)
meta_data$File <- meta_data$sampleid
meta_data <- meta_data[c(1,4,2,3)]
meta_data$File <- paste0(meta_data$File, ".counts")

ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = meta_data,
                                       directory = "../data/htseq/",
                                       design= ~ 0 + group + batch)
rownames(ddsHTSeq) <- sapply(str_split(rownames(ddsHTSeq), "\\."), function(x) x[1])
  1. Filter gene table by counts.
keep <- rowSums(counts(ddsHTSeq)>= 10) > ncol(ddsHTSeq)*0.3
ddsHTSeq <- ddsHTSeq[keep,]
  1. Differential expression analysis.
ddsHTSeq$group <- relevel(ddsHTSeq$group, ref = "M")
ddsHTSeq <- DESeq(ddsHTSeq)

contr_1 <- makeContrasts(groupM - groupM_BIF, levels = resultsNames(ddsHTSeq))
contr_2 <- makeContrasts(groupM - groupM_LAC, levels = resultsNames(ddsHTSeq))
contr_3 <- makeContrasts(groupM_BIF - groupM_LAC, levels = resultsNames(ddsHTSeq))

res_1 <- results(ddsHTSeq, contrast=contr_1)
res_2 <- results(ddsHTSeq, contrast=contr_2)
res_3 <- results(ddsHTSeq, contrast=contr_3)

5. Get normalized counts table.

COU <- counts(ddsHTSeq, normalized=TRUE)

datatable(COU, 
          caption = "Table 1. Normalized counts.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(COU), fontSize = '75%')
  1. Get DEG tables.
get_DEG <- function(res){
    DEG <- cbind(as.data.frame(res), COU)
    DEG <- DEG[!is.na(DEG$padj),]
    DEG <- DEG[order(DEG$log2FoldChange, decreasing = T),]
    return(DEG)
}

DEG_BIF <- get_DEG(res_1)
DEG_LAC <- get_DEG(res_2)
DEG_BIF_LAC <- get_DEG(res_3)

datatable(DEG_BIF, 
          caption = "Table 2. DEG M vs M_BIF.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(DEG_BIF), fontSize = '75%')
datatable(DEG_LAC, 
          caption = "Table 3. DEG M vs M_LAC.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(DEG_LAC), fontSize = '75%')
datatable(DEG_BIF_LAC, 
          caption = "Table 4. DEG M_BIF vs M_LAC.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(DEG_BIF_LAC), fontSize = '75%')
  1. Get volcano plots
VolcanoPlot1 <- EnhancedVolcano(DEG_BIF,
                lab = rownames(DEG_BIF),
                x = 'log2FoldChange',
                y = 'padj', pCutoff = 0.05, 
                subtitle = "", 
                title = "M vs M_BIF", labSize = 0)

VolcanoPlot2 <- EnhancedVolcano(DEG_LAC,
                lab = rownames(DEG_LAC),
                x = 'log2FoldChange',
                y = 'padj', pCutoff = 0.05, 
                subtitle = "", 
                title = "M vs M_LAC", 
                labSize = 0)

VolcanoPlot3 <- EnhancedVolcano(DEG_BIF_LAC,
                lab = rownames(DEG_BIF_LAC),
                x = 'log2FoldChange',
                y = 'padj', pCutoff = 0.05, 
                subtitle = "", 
                title = "M_BIF vs M_LAC", 
                labSize = 0)

volcano.all <- ggarrange(VolcanoPlot1, 
                         VolcanoPlot2, 
                         VolcanoPlot3, nrow = 1, 
                         common.legend = T)
volcano.all

Figure 1. Volcano plots show DEG between experimental groups.
  1. MSigDB GSEA.
pathwaysDF <- msigdbr("mouse", category="H")
pathways <- split(as.character(pathwaysDF$ensembl_gene), pathwaysDF$gs_name)

get_ranks <- function(DEG){
    ranks.df <- DEG$log2FoldChange
    names(ranks.df) <- rownames(DEG)
    names(ranks.df) <- sapply(str_split(names(ranks.df), "\\."), function(x) x[1])    
    ranks.df <- sort(ranks.df, decreasing = T)
    return(ranks.df)
}

ranks.bif <- get_ranks(DEG_BIF)
ranks.lac <- get_ranks(DEG_LAC)
ranks.bif.lac <- get_ranks(DEG_BIF_LAC)

fgsea.bif <- fgsea(pathways = pathways, 
                  stats    = ranks.bif,
                  eps      = 0.0,
                  minSize  = 15,
                  maxSize  = 500)

fgsea.lac <- fgsea(pathways = pathways, 
                     stats    = ranks.lac,
                     eps      = 0.0,
                     minSize  = 15,
                     maxSize  = 500)

fgsea.bif.lac <- fgsea(pathways = pathways, 
                   stats    = ranks.bif.lac,
                   eps      = 0.0,
                   minSize  = 15,
                   maxSize  = 500)

filter_pval <- function(fgsea.res){
    fgsea.res <- as.data.frame(fgsea.res)
    fgsea.res <- fgsea.res[fgsea.res$padj < 0.05,]
    fgsea.res <- fgsea.res[order(fgsea.res$NES, decreasing = T),]
    fgsea.res <- fgsea.res[-8]
    return(fgsea.res)
}

fgsea.bif.filter <- filter_pval(fgsea.bif)
fgsea.lac.filter <- filter_pval(fgsea.lac)
fgsea.bif.lac.filter <- filter_pval(fgsea.bif.lac)

datatable(fgsea.bif.filter, 
          caption = "Table 5. DEG M vs M_BIF.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(fgsea.bif.filter), fontSize = '75%')
datatable(fgsea.lac.filter, 
          caption = "Table 6. DEG M vs M_LAC.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(fgsea.lac.filter), fontSize = '75%')
datatable(fgsea.bif.lac.filter, 
          caption = "Table 7. DEG M_BIF vs M_LAC.",
          style = "bootstrap5", 
          extensions = 'Buttons', 
          options = list(dom = 'Bfrtip',
                         buttons = c('csv', 'excel'))) %>% 
    formatStyle(columns = colnames(fgsea.bif.lac.filter), fontSize = '75%')
get_subset <- function(df, group){
    df.sbs <- df[c(1,6)]
    df.sbs$group <- group
    return(df.sbs)
}

fgsea.bif.sbs <- get_subset(fgsea.bif.filter, "M vs M_BIF")
fgsea.lac.sbs <- get_subset(fgsea.lac.filter, "M vs M_LAC")
fgsea.bif.lac.sbs <- get_subset(fgsea.bif.lac.filter, "M_BIF vs M_LAC")

fgsea.all <- rbind(fgsea.bif.sbs, fgsea.lac.sbs, fgsea.bif.lac.sbs)
fgsea.all <- spread(fgsea.all, group, NES, fill = 0)
rownames(fgsea.all) <- fgsea.all$pathway
fgsea.all <- fgsea.all[-1]

heatmap.hallmark <- pheatmap::pheatmap(as.matrix(fgsea.all), cutree_rows = 7, cutree_cols = 2, display_numbers = T)
heatmap.hallmark

Figure 2. MSigDB Hallmark GSEA heatmap.